{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 100% local Agentic RAG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/akshaypachaar/miniconda3/envs/env_crewai/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import warnings\n",
    "from crewai import Agent, Crew, Task, LLM, Process\n",
    "from src.agentic_rag.tools.custom_tool import DocumentSearchTool\n",
    "from src.agentic_rag.tools.custom_tool import FireCrawlWebSearchTool\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Setup LLM\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = LLM(\n",
    "    model=\"ollama/llama3.2\",\n",
    "    base_url=\"http://localhost:11434\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Setup Tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdf_tool = DocumentSearchTool(file_path='/Users/akshaypachaar/Eigen/ai-engineering/agentic_rag/knowledge/dspy.pdf')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make sure you have the API key for FireCrawl in your environment variables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "web_search_tool = FireCrawlWebSearchTool()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Agents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "retriever_agent = Agent(\n",
    "    role=\"\"\"Retrieve relevant information to answer the user query: {query}\"\"\",\n",
    "    goal=\"\"\"Retrieve the most relevant information from the available sources \n",
    "            for the user query: {query}, always try to use the pdf search tool first. \n",
    "            If you are not able to retrieve the information from the pdf search tool \n",
    "            then try to use the web search tool.\"\"\",\n",
    "    backstory=\"\"\"You're a meticulous analyst with a keen eye for detail. \n",
    "                You're known for your ability understand the user query: {query} \n",
    "                and retrieve knowlege from the most suitable knowledge base.\"\"\",\n",
    "    verbose=True,\n",
    "    tools=[\n",
    "        pdf_tool,\n",
    "        web_search_tool\n",
    "    ],\n",
    "    # llm=llm\n",
    ")\n",
    "\n",
    "response_synthesizer_agent = Agent(\n",
    "    role=\"\"\"Response synthesizer agent for the user query: {query}\"\"\",\n",
    "    goal=\"\"\"Synthesize the retrieved information into a concise and coherent response \n",
    "            based on the user query: {query}. If you are not able to retrieve the \n",
    "            information then respond with \"I'm sorry, I couldn't find the information \n",
    "            you're looking for.\"\"\",\n",
    "    backstory=\"\"\"You're a skilled communicator with a knack for turning complex \n",
    "                information into clear and concise responses.\"\"\",\n",
    "    verbose=True,\n",
    "    # llm=llm\n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "retrieval_task = Task(\n",
    "    description=\"\"\"Retrieve the most relevant information from the available \n",
    "                   sources for the user query: {query}\"\"\",\n",
    "    expected_output=\"\"\"The most relevant information in form of text as retrieved\n",
    "                       from the sources.\"\"\",\n",
    "    agent=retriever_agent\n",
    ")\n",
    "\n",
    "response_task = Task(\n",
    "    description=\"\"\"Synthesize the final response for the user query: {query}\"\"\",\n",
    "    expected_output=\"\"\"A concise and coherent response based on the retrieved infromation\n",
    "                       from the right source for the user query: {query}. If you are not \n",
    "                       able to retrieve the information then respond with \n",
    "                       I'm sorry, I couldn't find the information you're looking for.\"\"\",\n",
    "    agent=response_synthesizer_agent\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Initialize Crew"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "crew = Crew(\n",
    "\t\t\tagents=[retriever_agent, response_synthesizer_agent], \n",
    "\t\t\ttasks=[retrieval_task, response_task],\n",
    "\t\t\tprocess=Process.sequential,\n",
    "\t\t\tverbose=True,\n",
    "\t\t\t# process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/\n",
    "\t\t)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Kickoff Crew"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[95m# Agent:\u001b[00m \u001b[1m\u001b[92mRetrieve relevant information to answer the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\u001b[95m## Task:\u001b[00m \u001b[92mRetrieve the most relevant information from the available \n",
      "                   sources for the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\n",
      "\n",
      "\u001b[1m\u001b[95m# Agent:\u001b[00m \u001b[1m\u001b[92mRetrieve relevant information to answer the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\u001b[95m## Thought:\u001b[00m \u001b[92mI will start by searching the document for information regarding the dates of the Australian Open in 2025.\u001b[00m\n",
      "\u001b[95m## Using tool:\u001b[00m \u001b[92mDocumentSearchTool\u001b[00m\n",
      "\u001b[95m## Tool Input:\u001b[00m \u001b[92m\n",
      "\"{\\\"query\\\": \\\"Australian Open 2025 dates\\\"}\"\u001b[00m\n",
      "\u001b[95m## Tool Output:\u001b[00m \u001b[92m\n",
      "02406, 2022.\n",
      "\n",
      "\n",
      "___\n",
      " In International Conference on Machine\n",
      "Learning, pp.\n",
      "___\n",
      " arXiv preprint arXiv:2305.03495, 2023.\n",
      "\n",
      "Peng Qi, Xiaowen Lin, Leo Mehr, Zijian Wang, and Christopher D. Manning. Answering complex\n",
      "In Proceedings of the 2019 Con-\n",
      "\n",
      "___\n",
      "Takuya Akiba, Shotaro Sano, Toshihiko Yanase, Takeru Ohta, and Masanori Koyama. Optuna:\n",
      "In Proceedings of the 25th ACM\n",
      "A next-generation hyperparameter optimization framework.\n",
      "\n",
      "___\n",
      " arXiv preprint arXiv:1809.09600, 2018.\n",
      "\n",
      "\n",
      "___\n",
      "Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\n",
      "\n",
      "# solution in Python:\n",
      "\n",
      "def solution():\n",
      "\n",
      "\"\"\"Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\"\"\"\n",
      "money initial = 23\n",
      "bagels = 5\n",
      "bagel cost = 3\n",
      "money spent = bagels * bagel cost\n",
      "money left = money initial - money spent\n",
      "result = money left\n",
      "return result\n",
      "\n",
      "Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he\n",
      "have at the end of wednesday?\n",
      "\n",
      "# solution in Python:\n",
      "\n",
      "def solution():\n",
      "\n",
      "\"\"\"Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls\n",
      "\n",
      "did he have at the end of wednesday?\"\"\"\n",
      "\n",
      "golf balls initial = 58\n",
      "golf balls lost tuesday = 23\n",
      "golf balls lost wednesday = 2\n",
      "golf balls left = golf balls initial - golf balls lost tuesday - golf balls lost wednesday\n",
      "result = golf balls left\n",
      "return result\n",
      "\n",
      "Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday.\n",
      "How many computers are now in the server room?\n",
      "\n",
      "# solution in Python:\n",
      "\n",
      "def solution():\n",
      "\n",
      "\"\"\"There were nine computers in the server room. Five more computers were installed each day, from monday to thursday.\n",
      "\n",
      "How many computers are now in the server room?\"\"\"\n",
      "\n",
      "computers initial = 9\n",
      "computers per day = 5\n",
      "num days = 4\n",
      "computers added = computers per day * num days\n",
      "computers total = computers initial + computers added\n",
      "result = computers total\n",
      "return result\n",
      "\n",
      "Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?\n",
      "\n",
      "# solution in Python:\n",
      "\n",
      "def solution():\n",
      "\n",
      "\"\"\"Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?\"\"\"\n",
      "toys initial = 5\n",
      "mom toys = 2\n",
      "dad toys = 2\n",
      "total received = mom toys + dad toys\n",
      "total toys = toys initial + total received\n",
      "\n",
      "___\n",
      "I checked: {query}\n",
      "\n",
      "___\n",
      "======== table info ========\n",
      "{table info}\n",
      "Question: {input}\n",
      "SQLQuery:\n",
      "\n",
      "\n",
      "___\n",
      "# Step 2: Evaluate the generated candidate program .\n",
      "score = evaluate_program ( candidate_program , self . metric , valset )\n",
      "candidates . append (( score , candidate_program ) )\n",
      "\n",
      "# Create a new basic bootstrap few - shot program .\n",
      "shuffled_trainset = shuffle ( trainset , seed = seed )\n",
      "tp = BootstrapFewShot ( metric = metric , max_bootstrap_demos = random_size () )\n",
      "candidate_program = tp . compile ( student , shuffled_trainset , teacher )\n",
      "\n",
      "28\n",
      "\n",
      "\fPreprint\n",
      "\n",
      "E.3 BOOTSTRAPFEWSHOTWITHOPTUNA\n",
      "\n",
      "pool = self . pool\n",
      "\n",
      "def objective ( self , trial ):\n",
      "\n",
      "def __init__ ( self , metric , trials =16) :\n",
      "\n",
      "self . metric = metric\n",
      "self . trials = trials\n",
      "\n",
      "# Step 1: Create copy of student program .\n",
      "candidate_program = self . student . reset_copy ()\n",
      "\n",
      "\n",
      "___\n",
      "1 class SimplifiedBootstrapFewShotWithOptuna ( Teleprompter ) :\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "10\n",
      "11\n",
      "12\n",
      "13\n",
      "14\n",
      "15\n",
      "16\n",
      "17\n",
      "18\n",
      "19\n",
      "20\n",
      "21\n",
      "22\n",
      "23\n",
      "24\n",
      "25\n",
      "26\n",
      "27\n",
      "28\n",
      "29\n",
      "30\n",
      "31\n",
      "32\n",
      "33\n",
      "34\n",
      "35\n",
      "36\n",
      "37\n",
      "38\n",
      "39\n",
      "40\n",
      "41\n",
      "42\n",
      "43\n",
      "44\n",
      "\n",
      "print ( ’ Best score : ’, best_program . score )\n",
      "print ( ’ Best program : ’, best_program )\n",
      "\u001b[00m\n",
      "\n",
      "\n",
      "\u001b[1m\u001b[95m# Agent:\u001b[00m \u001b[1m\u001b[92mRetrieve relevant information to answer the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\u001b[95m## Thought:\u001b[00m \u001b[92mThought: It seems that the document search did not yield any relevant information regarding the dates for the Australian Open 2025. I will now search the internet to find the information.\u001b[00m\n",
      "\u001b[95m## Using tool:\u001b[00m \u001b[92mSearch the internet\u001b[00m\n",
      "\u001b[95m## Tool Input:\u001b[00m \u001b[92m\n",
      "\"{\\\"search_query\\\": \\\"Australian Open 2025 schedule dates\\\"}\"\u001b[00m\n",
      "\u001b[95m## Tool Output:\u001b[00m \u001b[92m\n",
      "\n",
      "Search results: Title: 2025 Australian Open Tennis Schedule - Roadtrips\n",
      "Link: https://www.roadtrips.com/tennis-packages/australian-open/schedule/\n",
      "Snippet: 2025 Australian Open Schedule ; 9, Day, Mon, January 20 ; 9, Night, Mon, January 20 ; 10, Day, Tues, January 21 ; 10, Night, Tues, January 21 ...\n",
      "---\n",
      "Title: 2025 Australian Open Schedule of Play - Grand Slam Tennis Tours\n",
      "Link: https://www.grandslamtennistours.com/australian-open/schedule-of-play\n",
      "Snippet: 2025 Australian Open Schedule of Play ; 22 · 23 ; 5:00 PM · 10:00 AM.\n",
      "---\n",
      "Title: Australian Open 2025 dates announced | AO\n",
      "Link: https://ausopen.com/articles/news/australian-open-2025-dates-announced\n",
      "Snippet: Australian Open 2025 dates are set for 6-26 January at Melbourne Park, guaranteeing fans three weeks of thrilling Grand Slam tennis.\n",
      "---\n",
      "Title: Australian Open 2025 draw: How to watch and follow along | AO\n",
      "Link: https://ausopen.com/articles/news/australian-open-2025-draw-how-watch-and-follow-along\n",
      "Snippet: Thursday 9 January marks the date the Australian Open men's and women's singles draws will be revealed. From 2.30pm AEDT, the draw will be ...\n",
      "---\n",
      "Title: Australian Open 2025: Schedule, format and how to watch\n",
      "Link: https://www.usatoday.com/story/sports/tennis/aus/2025/01/02/australian-open-schedule-format-how-to-watch/77360328007/\n",
      "Snippet: Date: Sunday, Jan. 5 to Saturday, Jan. 25 · TV: ESPN family of networks, Tennis Channel · Streaming: ESPN+, Fubo · Location: Multiple venues ( ...\n",
      "---\n",
      "Title: Official Australian Open 2025 Packages & Tickets | Book Now\n",
      "Link: https://events.com.au/tennis/australian-open-2025/\n",
      "Snippet: Australian Open 2025 will start on Sunday, 12 January 2025 and finish Sunday, 26 January 2025. It's a two week long tournament. Where is Australian ...\n",
      "---\n",
      "Title: 2025 Australian Open: Dates, schedule, draw, and odds\n",
      "Link: https://tenngrand.com/2025-australian-open-dates-schedule-draw-and-odds/\n",
      "Snippet: The Australian Open will take place January 13-26 at Melbourne Park in Melbourne, Australia. Qualifying begins on Monday, January 6. Schedule.\n",
      "---\n",
      "Title: Australian Open 2025 provisional schedule: Plan your three weeks ...\n",
      "Link: https://ausopen.com/articles/news/australian-open-2025-provisional-schedule-plan-your-three-weeks-melbourne-park\n",
      "Snippet: The men's singles semifinals – one beginning in the afternoon, one that same evening – are scheduled at Rod Laver Arena on Friday 24 January, ...\n",
      "---\n",
      "\n",
      "\n",
      "\n",
      "You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n",
      "\n",
      "Tool Name: DocumentSearchTool\n",
      "Tool Arguments: {'query': {'description': 'Query to search the document.', 'type': 'str'}}\n",
      "Tool Description: Search the document for the given query.\n",
      "Tool Name: Search the internet\n",
      "Tool Arguments: {'search_query': {'description': 'Mandatory search query you want to use to search the internet', 'type': 'str'}}\n",
      "Tool Description: A tool that can be used to search the internet with a search_query.\n",
      "\n",
      "Use the following format:\n",
      "\n",
      "Thought: you should always think about what to do\n",
      "Action: the action to take, only one name of [DocumentSearchTool, Search the internet], just the name, exactly as it's written.\n",
      "Action Input: the input to the action, just a simple python dictionary, enclosed in curly braces, using \" to wrap keys and values.\n",
      "Observation: the result of the action\n",
      "\n",
      "Once all necessary information is gathered:\n",
      "\n",
      "Thought: I now know the final answer\n",
      "Final Answer: the final answer to the original input question\n",
      "\u001b[00m\n",
      "\n",
      "\n",
      "\u001b[1m\u001b[95m# Agent:\u001b[00m \u001b[1m\u001b[92mRetrieve relevant information to answer the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\u001b[95m## Final Answer:\u001b[00m \u001b[92m\n",
      "The Australian Open 2025 will take place from January 6 to January 26, 2025, at Melbourne Park in Melbourne, Australia.\u001b[00m\n",
      "\n",
      "\n",
      "\u001b[1m\u001b[95m# Agent:\u001b[00m \u001b[1m\u001b[92mResponse synthesizer agent for the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\u001b[95m## Task:\u001b[00m \u001b[92mSynthesize the final response for the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\n",
      "\n",
      "\u001b[1m\u001b[95m# Agent:\u001b[00m \u001b[1m\u001b[92mResponse synthesizer agent for the user query: When is Australian open 2025 happening?\u001b[00m\n",
      "\u001b[95m## Final Answer:\u001b[00m \u001b[92m\n",
      "The Australian Open 2025 will take place from January 6 to January 26, 2025, at Melbourne Park in Melbourne, Australia.\u001b[00m\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "result = crew.kickoff(inputs={\"query\": \"When is Australian open 2025 happening?\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The Australian Open 2025 will take place from January 6 to January 26, 2025, at Melbourne Park in Melbourne, Australia.\n"
     ]
    }
   ],
   "source": [
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env_crewai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
